#!/usr/bin/python27
#coding=utf8
import urllib2,sys
from bs4 import BeautifulSoup
import re

"""
从http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201608/t20160809_1386477.html
爬取 省，市，区，县 的数据   现在只是写到文件
Author :Yyb
Date: 20170228
Email: yangyingbo@unimlink.com
"""

def main():
    html_url = "http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201608/t20160809_1386477.html"
    res = urllib2.urlopen(html_url)
    soup = BeautifulSoup(res, 'html.parser')
    res.close()
    div = soup.find_all('div', 'TRS_PreAppend')
    city_list = []
    for div_i in div:
        soup_1 = BeautifulSoup(str(div_i), 'html.parser')
        p = soup_1.find_all('p')
        for p_i in p:
            string_sss = str(p_i).replace('<p class="MsoNormal"><span lang="EN-US">', '').replace('<span>     </span></span><span style="font-family: 宋体"','').replace('</span></p>', '')
            # string_sss = re.sub('\s.*', ',', string_sss)
            # print string_sss
            string_sss_list = string_sss.split(">")
            # print string_sss_list,len(string_sss_list)
            bianhao_tmp = re.sub('\D', '', string_sss_list[0])
            bianhao = int(bianhao_tmp)
            mingcheng = string_sss_list[len(string_sss_list)-1]
            # print mingcheng
            city_tmp = []
            if bianhao % 10000 == 0 :
                """省级"""
                city_tmp.append(bianhao)
                city_tmp.append('0')
                city_tmp.append(0)
                up_code = bianhao
            elif bianhao % 100 == 0 :
                """区市级"""
                city_tmp.append(bianhao)
                city_tmp.append('1')
                city_tmp.append(up_code)
                up_code = bianhao
            else :
                """县级"""
                city_tmp.append(bianhao)
                city_tmp.append('2')
                city_tmp.append(up_code)
            city_tmp.append(mingcheng.replace('　', ''))
            city_tuple = tuple(city_tmp)
            city_list.append(city_tuple)
    sql_list = []
    for city_list_i in city_list:
        # print city_list_i
        sql = """
        insert into cvm_city_list (city_code,city_code_type,up_city_code,city_name,flag,create_time)
        values (%d,'%s',%d,trim('%s'),'1',now())
        """ % (city_list_i[0], city_list_i[1], city_list_i[2], city_list_i[3])
        sql_list.append(sql)
    run_Sql(sql_list)


def run_Sql(list_sql):
    from cvm.common.RunSql import RunSQl
    runSql = RunSQl()
    # runSql.run_SqlList(list_sql)
    runSql.create_sqllist_file(list_sql)


if __name__ == '__main__':
    main()